This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

install.packages("tidyverse")
also installing the dependencies ‘DBI’, ‘dbplyr’, ‘modelr’, ‘reprex’

trying URL 'https://cran.rstudio.com/bin/macosx/el-capitan/contrib/3.5/DBI_1.0.0.tgz'
Content type 'application/x-gzip' length 876902 bytes (856 KB)
==================================================
downloaded 856 KB

trying URL 'https://cran.rstudio.com/bin/macosx/el-capitan/contrib/3.5/dbplyr_1.4.2.tgz'
Content type 'application/x-gzip' length 569902 bytes (556 KB)
==================================================
downloaded 556 KB

trying URL 'https://cran.rstudio.com/bin/macosx/el-capitan/contrib/3.5/modelr_0.1.5.tgz'
Content type 'application/x-gzip' length 198251 bytes (193 KB)
==================================================
downloaded 193 KB

trying URL 'https://cran.rstudio.com/bin/macosx/el-capitan/contrib/3.5/reprex_0.3.0.tgz'
Content type 'application/x-gzip' length 423802 bytes (413 KB)
==================================================
downloaded 413 KB

trying URL 'https://cran.rstudio.com/bin/macosx/el-capitan/contrib/3.5/tidyverse_1.2.1.tgz'
Content type 'application/x-gzip' length 88754 bytes (86 KB)
==================================================
downloaded 86 KB

The downloaded binary packages are in
    /var/folders/jb/5s85b1c570b6rhdk3p3yr6xr0000gn/T//RtmpQVYylN/downloaded_packages
library(ggplot2)
library(dplyr)
library(gridExtra)
library(magrittr)
library(RColorBrewer)
attach(Housesale)
The following objects are masked from Housesale (pos = 3):

    bathrooms, bedrooms, condition, date, floors, grade, id, isreno, lat, long, price, sqft_above,
    sqft_basement, sqft_living, sqft_living15, sqft_lot, sqft_lot15, view, waterfront, yr_built,
    yr_renovated, zipcode

The following objects are masked from Housesale (pos = 5):

    bathrooms, bedrooms, condition, date, floors, grade, id, isreno, lat, long, price, sqft_above,
    sqft_basement, sqft_living, sqft_living15, sqft_lot, sqft_lot15, view, waterfront, yr_built,
    yr_renovated, zipcode

The following objects are masked from Housesale (pos = 7):

    bathrooms, bedrooms, condition, date, floors, grade, id, isreno, lat, long, price, sqft_above,
    sqft_basement, sqft_living, sqft_living15, sqft_lot, sqft_lot15, view, waterfront, yr_built,
    yr_renovated, zipcode

The following objects are masked from Housesale (pos = 11):

    bathrooms, bedrooms, condition, date, floors, grade, id, lat, long, price, sqft_above,
    sqft_basement, sqft_living, sqft_living15, sqft_lot, sqft_lot15, view, waterfront, yr_built,
    yr_renovated, zipcode
library(readr)
library(caret)
library(corrplot)
library(caTools)
library(tidyverse)
── Attaching packages ─────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
✔ tibble  2.1.3     ✔ purrr   0.3.2
✔ tidyr   0.8.3     ✔ stringr 1.4.0
✔ tibble  2.1.3     ✔ forcats 0.4.0
── Conflicts ────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ gridExtra::combine() masks dplyr::combine()
✖ tidyr::extract()     masks magrittr::extract()
✖ dplyr::filter()      masks stats::filter()
✖ dplyr::lag()         masks stats::lag()
✖ purrr::lift()        masks caret::lift()
✖ purrr::set_names()   masks magrittr::set_names()
library(randomForest)
randomForest 4.6-14
Type rfNews() to see new features/changes/bug fixes.

Attaching package: ‘randomForest’

The following object is masked from ‘package:gridExtra’:

    combine

The following object is masked from ‘package:dplyr’:

    combine

The following object is masked from ‘package:ggplot2’:

    margin
## Importing data from CSV and summary
getwd()
[1] "/Users/saileshraturi/Documents/GitHub/Regression-House-Sale"
setwd("/Users/saileshraturi/Documents/GitHub/Regression-House-Sale")
Housesale = read.csv("kc_house_data.csv")
Housesale
head(Housesale)
str(Housesale)
'data.frame':   21613 obs. of  21 variables:
 $ id           : num  7.13e+09 6.41e+09 5.63e+09 2.49e+09 1.95e+09 ...
 $ date         : Factor w/ 372 levels "20140502T000000",..: 165 221 291 221 284 11 57 252 340 306 ...
 $ price        : num  221900 538000 180000 604000 510000 ...
 $ bedrooms     : int  3 3 2 4 3 4 3 3 3 3 ...
 $ bathrooms    : num  1 2.25 1 3 2 4.5 2.25 1.5 1 2.5 ...
 $ sqft_living  : int  1180 2570 770 1960 1680 5420 1715 1060 1780 1890 ...
 $ sqft_lot     : int  5650 7242 10000 5000 8080 101930 6819 9711 7470 6560 ...
 $ floors       : num  1 2 1 1 1 1 2 1 1 2 ...
 $ waterfront   : int  0 0 0 0 0 0 0 0 0 0 ...
 $ view         : int  0 0 0 0 0 0 0 0 0 0 ...
 $ condition    : int  3 3 3 5 3 3 3 3 3 3 ...
 $ grade        : int  7 7 6 7 8 11 7 7 7 7 ...
 $ sqft_above   : int  1180 2170 770 1050 1680 3890 1715 1060 1050 1890 ...
 $ sqft_basement: int  0 400 0 910 0 1530 0 0 730 0 ...
 $ yr_built     : int  1955 1951 1933 1965 1987 2001 1995 1963 1960 2003 ...
 $ yr_renovated : int  0 1991 0 0 0 0 0 0 0 0 ...
 $ zipcode      : int  98178 98125 98028 98136 98074 98053 98003 98198 98146 98038 ...
 $ lat          : num  47.5 47.7 47.7 47.5 47.6 ...
 $ long         : num  -122 -122 -122 -122 -122 ...
 $ sqft_living15: int  1340 1690 2720 1360 1800 4760 2238 1650 1780 2390 ...
 $ sqft_lot15   : int  5650 7639 8062 5000 7503 101930 6819 9711 8113 7570 ...
summary(Housesale)
       id                         date           price            bedrooms        bathrooms      sqft_living   
 Min.   :1.000e+06   20140623T000000:  142   Min.   :  75000   Min.   : 0.000   Min.   :0.000   Min.   :  290  
 1st Qu.:2.123e+09   20140625T000000:  131   1st Qu.: 321950   1st Qu.: 3.000   1st Qu.:1.750   1st Qu.: 1427  
 Median :3.905e+09   20140626T000000:  131   Median : 450000   Median : 3.000   Median :2.250   Median : 1910  
 Mean   :4.580e+09   20140708T000000:  127   Mean   : 540088   Mean   : 3.371   Mean   :2.115   Mean   : 2080  
 3rd Qu.:7.309e+09   20150427T000000:  126   3rd Qu.: 645000   3rd Qu.: 4.000   3rd Qu.:2.500   3rd Qu.: 2550  
 Max.   :9.900e+09   20150325T000000:  123   Max.   :7700000   Max.   :33.000   Max.   :8.000   Max.   :13540  
                     (Other)        :20833                                                                     
    sqft_lot           floors        waterfront            view          condition         grade       
 Min.   :    520   Min.   :1.000   Min.   :0.000000   Min.   :0.0000   Min.   :1.000   Min.   : 1.000  
 1st Qu.:   5040   1st Qu.:1.000   1st Qu.:0.000000   1st Qu.:0.0000   1st Qu.:3.000   1st Qu.: 7.000  
 Median :   7618   Median :1.500   Median :0.000000   Median :0.0000   Median :3.000   Median : 7.000  
 Mean   :  15107   Mean   :1.494   Mean   :0.007542   Mean   :0.2343   Mean   :3.409   Mean   : 7.657  
 3rd Qu.:  10688   3rd Qu.:2.000   3rd Qu.:0.000000   3rd Qu.:0.0000   3rd Qu.:4.000   3rd Qu.: 8.000  
 Max.   :1651359   Max.   :3.500   Max.   :1.000000   Max.   :4.0000   Max.   :5.000   Max.   :13.000  
                                                                                                       
   sqft_above   sqft_basement       yr_built     yr_renovated       zipcode           lat       
 Min.   : 290   Min.   :   0.0   Min.   :1900   Min.   :   0.0   Min.   :98001   Min.   :47.16  
 1st Qu.:1190   1st Qu.:   0.0   1st Qu.:1951   1st Qu.:   0.0   1st Qu.:98033   1st Qu.:47.47  
 Median :1560   Median :   0.0   Median :1975   Median :   0.0   Median :98065   Median :47.57  
 Mean   :1788   Mean   : 291.5   Mean   :1971   Mean   :  84.4   Mean   :98078   Mean   :47.56  
 3rd Qu.:2210   3rd Qu.: 560.0   3rd Qu.:1997   3rd Qu.:   0.0   3rd Qu.:98118   3rd Qu.:47.68  
 Max.   :9410   Max.   :4820.0   Max.   :2015   Max.   :2015.0   Max.   :98199   Max.   :47.78  
                                                                                                
      long        sqft_living15    sqft_lot15    
 Min.   :-122.5   Min.   : 399   Min.   :   651  
 1st Qu.:-122.3   1st Qu.:1490   1st Qu.:  5100  
 Median :-122.2   Median :1840   Median :  7620  
 Mean   :-122.2   Mean   :1987   Mean   : 12768  
 3rd Qu.:-122.1   3rd Qu.:2360   3rd Qu.: 10083  
 Max.   :-121.3   Max.   :6210   Max.   :871200  
                                                 

log_sqftliving = log10(sqft_living)
Housesale = Housesale %>% mutate(log_price = log10(price))
x = min(sqft_living)
y = max(sqft_living)
z = (y-x)

#taking anitlog
l1 = exp(2.9)
l1
[1] 18.17415
l2 = exp(3.6)
l2
[1] 36.59823
#Plot for Price
g1 = ggplot(Housesale, aes(x = log_price)) + geom_histogram(fill = "red", binwidth = .10)
#Plot for sqftliving
g2 = ggplot(Housesale, aes(x = log_sqftliving)) + geom_histogram(fill = "blue", binwidth = .20)
 
# grid.arrange(g1,g2,nrow = 1,ncol = 2)

 #House Price : Plot reflect the most of the prices of house is lie between 5.4 to 6 million.
 #Sqftliving :  Most of houses have  sqft living between 1800 to 3600 sqft.
 
 #plot for bathroom
 
 ggplot(Housesale, aes(x = bathrooms)) + geom_histogram(fill = "tomato", binwidth = 0.5) + scale_x_continuous(limits = c(1,8))

x = max(bathrooms)
y = min(bathrooms)
z = mad(bathrooms)
#House Price vs size

#jitter used to reduce overlapping
g3 = ggplot(Housesale, aes(x = log_sqftliving, y = log_price)) +  geom_jitter(alpha = 0.5, size = 2, color = "brown") + stat_smooth(method = "lm", se = F, span = 0.7) + labs("title = sqftliving vs Price")

#HousePrice vs Bedroom

mycolors = c(brewer.pal(name = "Dark2", n=8), brewer.pal(name="Paired", n=6))
#Housesale = Housesale %>% filter(bedrooms < 30)
#Housesale
g4 = ggplot(Housesale, aes(x = bedrooms, y = log_price, col = bedrooms)) + geom_point(alpha = 0.5, size = 2) + geom_smooth(method = "lm",se = F) + scale_color_gradientn(colors = mycolors)

grid.arrange(g3,g4,nrow = 1, ncol = 2)


#ggplot(aes(Housesale,x=bedrooms,y=log_price))+
#geom_point(alpha=0.5,size=2)+
#geom_smooth(method="lm",se=F)+
#labs("title=Bedrooms vs Price")+scale_color_gradientn(colors=mycolors)+theme(legend.position="none")

 g5 = ggplot(Housesale, aes(x = sqft_basement, y = log_price)) + geom_point( col = "green", alpha = 0.5) + stat_smooth(method = "lm", se = F, alpha = 0.6, size = 0.5)

g6 = ggplot(Housesale, aes(x = yr_built, y = log_price)) + geom_jitter(col = "blue", alpha = 0.5) + geom_smooth(method = "auto", se = T)

grid.arrange(g5,g6,nrow = 1, ncol = 2)


table(condition)
condition
    1     2     3     4     5 
   30   172 14031  5679  1701 
Housesale %>% group_by(factor(condition)) %>% summarise(mean_price = mean(log_price), sd = sd(log_price), count = n())
# Distribution of Houseprice according to condition of house

ggplot(Housesale, aes(x = factor(condition), y = log_price, fill = factor(condition))) + geom_boxplot()


# Relationship between, size, price and condition
ggplot(Housesale, aes(x = log_sqftliving, y = log_price, color = factor(condition))) + geom_point(alpha = 0.5) +  geom_smooth(method = "lm", se = F, color = "Black") + facet_wrap(~condition) 


#grid.arrange(g7, g8, nrow = 1, ncol = 2)
table(floors)
Housesale %>% group_by(flr = factor(floors)) %>% summarise(floor_cnt = n()) %>%
ggplot(aes(x = flr,floor_cnt, fill = flr)) + geom_bar(stat = "identity")

#hist(floors)
ggplot(Housesale, aes(x = factor(floors), y = (log_price),fill = factor(floors))) + geom_boxplot()

#ggplot(Housesale, aes(x = yr_built, y = log_price)) + geom_point()
#+ 
#Houses bulit yearly

Housesale %>% ggplot(aes(x = yr_built)) + geom_histogram(binwidth = 5, fill = rainbow(1), alpha = 0.5) + scale_x_continuous(limits = c(1900,2016))

# House built year wise vs size of house(sqft)
options(repr.plot.width = 10, repr.plot.height = 6)
ggplot(Housesale, aes(x = factor(yr_built), y = log_sqftliving, fill = factor(yr_built))) + geom_boxplot() + theme(legend.position = "none") 

ggplot(Housesale, aes(x=yr_built, y = log_sqftliving, color = "green")) + geom_jitter(alpha =0.5, size = 0.5) + stat_smooth(method = "auto", color = "black")

# trend of increase in sqft living 1950 onwards till 1990
#House View

table(Housesale$waterfront)

Housesale$houseview = ifelse(waterfront ==1,TRUE,FALSE)
#ggplot(Housesale, aes(x = houseview, y = log_price, fill = factor(waterfront))) + geom_boxplot()
# Most of the houses doesnot have waterfront while houses with waterfront are more expensive

Housesale %>% group_by(houseview) %>% summarise(meanprice = mean(log_price),housecount = n() )

ggplot(Housesale,aes( x = log_sqftliving, y= log_price, col = houseview)) + geom_point(alpha = 0.5) + geom_smooth(method = "lm" ,size =0.5, color = "black") + scale_color_manual(values = rainbow(n=12)) +facet_wrap(~houseview)
# sold houses which have waterfront are expensive and high sqftliving but less in number
table(grade)

#grade vs price

#ggplot(Housesale, aes(x = factor(grade), y = log_price, fill = factor(grade))) + geom_boxplot(alpha = 0.5)

# grade vs sqftliving vs price

ggplot(Housesale, aes(x = log_sqftliving, y = log_price, color = factor(grade))) + geom_point(alpha = 0.5) + facet_wrap(~grade) + geom_smooth(method = "lm", color = "black") + scale_color_manual(values = rainbow(n=12)) + theme(legend.position = "none")
# Renovate year

table(Housesale$yr_renovated)

Housesale$isreno = ifelse(yr_renovated == 0, FALSE, TRUE)
table(Housesale$isreno)

# histogram of yr renovated
ggplot(Housesale, aes(yr_renovated)) + geom_histogram(alpha = 0.5,binwidth = 1, fill = rainbow(1)) + scale_x_continuous(limits = c(1900,2016))

#ggplot(Housesale, aes(x = factor(yr_renovated), y= log_price, fill = factor(yr_renovated))) + geom_boxplot()

# year built vs price vs renovate
ggplot(Housesale, aes(x = yr_built, y = log_price, col = yr_renovated)) + geom_jitter(alpha = 0.5)

#Renovate year vs year built
ggplot(Housesale, aes(x = yr_built, y = yr_renovated, color = isreno)) + geom_jitter(alpha = 0.5) + facet_wrap(~isreno)
#splitting the data into train and test subset
set.seed(0512)
sample = sample.split(Housesale, SplitRatio = .70)
trainhs = subset(Housesale, sample == TRUE)
tesths = subset(Housesale, sample == FALSE)
nrow(Housesale)
[1] 21613
ncol(Housesale)
[1] 22
nrow(trainhs)
[1] 14736
nrow(tesths)
[1] 6877
# variable significance and checking correlation

corr =  cor(Housesale[,3:21])
corrplot(corr)

NA
NA
NA
# model creation using linear regression(parameters on basis of correleation matrix)

modellm1 = lm(log(price) ~ bedrooms + bathrooms + sqft_living + waterfront + view + condition + grade + yr_built + yr_renovated + zipcode  + lat + sqft_living15 + sqft_lot15   , data = trainhs)

summary(modellm1)
#plot(modellm1)
# model creation using linear regression or training model( all parameters)
modellm2 = lm(log(price) ~ ., data = trainhs[,3:22])
summary(modellm2)
#plot(modellm2)
# model creation using linear regression using cross-validation technique(partitioning dataset into random partition - train and test(number of folds), average of accuracy metrics for all folds taken to come across how training model will perform on unknown test dataset)
#modellm3 = train(log(price) ~ .,data = trainhs[,3:22], method = "lm", trControl = trainControl(method = "cv",number = 10, savePredictions = TRUE))
install.packages("ggfortify")
modellm3 = train(price ~ .,data = Housesale, method = "lm", trControl = trainControl(method = "cv",number = 5, verboseIter = TRUE))
summary(modellm3)
library(ggfortify)
autoplot(modellm3,ncol = 2)
# model creation using linear regression using cross-validation technique(partitioning dataset into random partition - train and test(number of folds), average of accuracy metrics for all folds taken to come across how training model will perform on unknown test dataset)
modellm6 = train(log(price) ~  bedrooms + bathrooms + sqft_living + waterfront + view + condition + grade + yr_built + yr_renovated  + lat + sqft_living15 + sqft_lot15 + I(bedrooms^2)  + I(sqft_living^2) + I(view^2) + I(grade^2) + I(yr_built^2) + I(yr_renovated^2)  + I(lat^2) + I(sqft_living15^2)+ I(sqft_lot15^2),data = trainhs[,3:22], method = "lm", trControl = trainControl(method = "cv",number = 10, savePredictions = TRUE))
summary(modellm6)
#parametergrid <- expand.grid(c(5,6,7,8))
#summary(modellm3)
#checking residuals plot for each parameter
g1 = ggplot(trainhs, aes(bathrooms, residuals(modellm1))) + geom_point() + geom_smooth()
g2 = ggplot(trainhs, aes(sqft_living, residuals(modellm1))) + geom_point() + geom_smooth()
g3 = ggplot(trainhs, aes(view, residuals(modellm1))) + geom_point() + geom_smooth()
g4 = ggplot(trainhs, aes(grade, residuals(modellm1))) + geom_point() + geom_smooth()
g5 = ggplot(trainhs, aes(yr_built, residuals(modellm1))) + geom_point() + geom_smooth()
g6 =ggplot(trainhs, aes(sqft_living15, residuals(modellm1))) + geom_point() + geom_smooth()
g7 = ggplot(trainhs, aes(bedrooms, residuals(modellm1))) + geom_point() + geom_smooth()
g8 = ggplot(trainhs, aes(waterfront, residuals(modellm1))) + geom_point() + geom_smooth()
g9 = ggplot(trainhs, aes(lat, residuals(modellm1))) + geom_point() + geom_smooth()
g10 =ggplot(trainhs, aes(sqft_lot15, residuals(modellm1))) + geom_point() + geom_smooth()
g11 =ggplot(trainhs, aes(condition, residuals(modellm1))) + geom_point() + geom_smooth()
g12 =ggplot(trainhs, aes(yr_renovated, residuals(modellm1))) + geom_point() + geom_smooth()
g13 =ggplot(trainhs, aes(zipcode, residuals(modellm1))) + geom_point() + geom_smooth()

grid.arrange(g1,g2,g3,g4, nrow = 1, ncol = 4)
grid.arrange(g5,g6,g7,g8, nrow = 1, ncol = 4)
grid.arrange(g9,g10,g11,g12,g13, nrow = 1, ncol = 5)

#model creation using nonlinearlity of parameters based on residuals plot
#modellm4 = lm(log(price) ~  bedrooms + bathrooms + sqft_living + waterfront + view + condition + grade + yr_built + yr_renovated + zipcode  + lat + sqft_living15 + sqft_lot15 + I(bedrooms^2) + I(bathrooms^2) + I(sqft_living^2)+ I(waterfront^2)  + I(view^2) + I(condition^2) + I(grade^2) + I(yr_built^2) + I(yr_renovated^2)  +I(zipcode^2)  + I(lat^2) + I(sqft_living15^2)+ I(sqft_lot15^2) , data = trainhs)
#summary(modellm4)
#plot(modellm4)
modellm4 = lm(log(price) ~  bedrooms + bathrooms + sqft_living + waterfront + view + condition + grade + yr_built + yr_renovated + zipcode  + lat + sqft_living15 + sqft_lot15 + I(bedrooms^2) + I(bathrooms^2) + I(sqft_living^2)  + I(grade^2) + I(yr_built^2) + I(yr_renovated^2)  +I(zipcode^2)  + I(lat^2) + I(sqft_living15^2)+ I(sqft_lot15^2) , data = trainhs)
summary(modellm4)
#model creation after removing less significant parameters identified in modellm4
#modellm5 = update(modellm4, ~.-zipcode-I(bathrooms^2)-I(waterfront^2)-I(condition^2)-I(zipcode^2))
modellm5 = lm(log(price) ~  bedrooms + bathrooms + sqft_living + waterfront + view + condition + grade + yr_built + yr_renovated  + lat + sqft_living15 + sqft_lot15 + I(bedrooms^2)  + I(sqft_living^2) + I(view^2) + I(grade^2) + I(yr_built^2) + I(yr_renovated^2)  + I(lat^2) + I(sqft_living15^2)+ I(sqft_lot15^2) , data = trainhs)
summary(modellm5)

Call:
lm(formula = log(price) ~ bedrooms + bathrooms + sqft_living + 
    waterfront + view + condition + grade + yr_built + yr_renovated + 
    lat + sqft_living15 + sqft_lot15 + I(bedrooms^2) + I(sqft_living^2) + 
    I(view^2) + I(grade^2) + I(yr_built^2) + I(yr_renovated^2) + 
    I(lat^2) + I(sqft_living15^2) + I(sqft_lot15^2), data = trainhs)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.25879 -0.14747 -0.00418  0.14024  1.14221 

Coefficients:
                     Estimate Std. Error t value Pr(>|t|)    
(Intercept)        -7.067e+03  2.283e+02 -30.948  < 2e-16 ***
bedrooms           -2.185e-02  4.213e-03  -5.187 2.16e-07 ***
bathrooms           5.025e-02  4.649e-03  10.809  < 2e-16 ***
sqft_living         2.146e-04  9.006e-06  23.835  < 2e-16 ***
waterfront          4.151e-01  2.783e-02  14.918  < 2e-16 ***
view                9.124e-02  9.776e-03   9.333  < 2e-16 ***
condition           7.457e-02  3.418e-03  21.817  < 2e-16 ***
grade               1.809e-01  1.776e-02  10.188  < 2e-16 ***
yr_built           -2.041e-01  9.199e-03 -22.188  < 2e-16 ***
yr_renovated       -5.557e-03  6.422e-04  -8.653  < 2e-16 ***
lat                 3.050e+02  9.574e+00  31.857  < 2e-16 ***
sqft_living15       2.564e-04  1.569e-05  16.339  < 2e-16 ***
sqft_lot15          8.378e-07  1.205e-07   6.955 3.68e-12 ***
I(bedrooms^2)       8.598e-04  2.902e-04   2.962 0.003057 ** 
I(sqft_living^2)   -9.959e-09  1.247e-09  -7.985 1.51e-15 ***
I(view^2)          -9.144e-03  3.168e-03  -2.886 0.003904 ** 
I(grade^2)         -1.508e-03  1.107e-03  -1.363 0.172994    
I(yr_built^2)       5.134e-05  2.347e-06  21.880  < 2e-16 ***
I(yr_renovated^2)   2.806e-06  3.218e-07   8.720  < 2e-16 ***
I(lat^2)           -3.194e+00  1.007e-01 -31.714  < 2e-16 ***
I(sqft_living15^2) -3.648e-08  3.193e-09 -11.426  < 2e-16 ***
I(sqft_lot15^2)    -1.053e-12  2.992e-13  -3.520 0.000433 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.239 on 14714 degrees of freedom
Multiple R-squared:  0.7926,    Adjusted R-squared:  0.7923 
F-statistic:  2677 on 21 and 14714 DF,  p-value: < 2.2e-16
plot(modellm5)

NA

# rmse for regression model

testmodel = predict(modellm7, tesths[,3:21])
plot(exp(testmodel) ~ tesths$price)
#plot(testmodel ~ tesths$price)
abline(a = 0, b = 1)


res1 =  exp(testmodel) - tesths$price
#res1 =  testmodel - tesths$price
#rmse <- sqrt(sum((exp(testmodel) - tesths$price)^2)/length(tesths$price))
rmse = sqrt(mean(res1^2))
rmse
[1] 199403.9

#trying random forest model
modelrf  = randomForest(price ~ ., trainhs[,3:21],mtry = 6, importance = TRUE)
summary(modelrf)
importance(modelrf)
varImpPlot(modelrf,type = 2)
para = sqft_living+grade+lat+sqft_living15+sqft_above+long+bathrooms+yr_built + view


modellm7 = lm(log(price) ~ sqft_living + grade + lat + sqft_living15 + sqft_above + long + bathrooms + yr_built + view, data = trainhs[,3:21])
summary(modellm7)

Call:
lm(formula = log(price) ~ sqft_living + grade + lat + sqft_living15 + 
    sqft_above + long + bathrooms + yr_built + view, data = trainhs[, 
    3:21])

Residuals:
     Min       1Q   Median       3Q      Max 
-1.79935 -0.16215  0.00189  0.16113  1.24437 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)   -5.151e+01  2.327e+00 -22.133  < 2e-16 ***
sqft_living    1.274e-04  6.197e-06  20.553  < 2e-16 ***
grade          1.681e-01  3.300e-03  50.925  < 2e-16 ***
lat            1.333e+00  1.596e-02  83.499  < 2e-16 ***
sqft_living15  9.225e-05  5.284e-06  17.459  < 2e-16 ***
sqft_above     1.332e-05  6.033e-06   2.208  0.02729 *  
long          -5.337e-02  1.777e-02  -3.003  0.00268 ** 
bathrooms      8.879e-02  4.723e-03  18.800  < 2e-16 ***
yr_built      -3.712e-03  9.879e-05 -37.572  < 2e-16 ***
view           7.622e-02  3.037e-03  25.095  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2587 on 14726 degrees of freedom
Multiple R-squared:  0.7569,    Adjusted R-squared:  0.7567 
F-statistic:  5093 on 9 and 14726 DF,  p-value: < 2.2e-16
#rmse for random forest model
testmodel_rf = predict(modelrf, tesths)
#p1 = plot(tesths$price ~ exp(testmodel))
#abline(a =0, b =1)
#p2 = plot(tesths$price ~ testmodel_rf)
#abline(a =0, b =1)

#grid.arrange(p1,p2, nrow = 1, ncol = 2)
res1 =  testmodel_rf - tesths$price
rmse = sqrt(mean(res1^2))
rmse
[1] 135938.7
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKVGhpcyBpcyBhbiBbUiBNYXJrZG93bl0oaHR0cDovL3JtYXJrZG93bi5yc3R1ZGlvLmNvbSkgTm90ZWJvb2suIFdoZW4geW91IGV4ZWN1dGUgY29kZSB3aXRoaW4gdGhlIG5vdGVib29rLCB0aGUgcmVzdWx0cyBhcHBlYXIgYmVuZWF0aCB0aGUgY29kZS4gCgpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ21kK1NoaWZ0K0VudGVyKi4gCgoKCkFkZCBhIG5ldyBjaHVuayBieSBjbGlja2luZyB0aGUgKkluc2VydCBDaHVuayogYnV0dG9uIG9uIHRoZSB0b29sYmFyIG9yIGJ5IHByZXNzaW5nICpDbWQrT3B0aW9uK0kqLgoKV2hlbiB5b3Ugc2F2ZSB0aGUgbm90ZWJvb2ssIGFuIEhUTUwgZmlsZSBjb250YWluaW5nIHRoZSBjb2RlIGFuZCBvdXRwdXQgd2lsbCBiZSBzYXZlZCBhbG9uZ3NpZGUgaXQgKGNsaWNrIHRoZSAqUHJldmlldyogYnV0dG9uIG9yIHByZXNzICpDbWQrU2hpZnQrSyogdG8gcHJldmlldyB0aGUgSFRNTCBmaWxlKS4gCgpUaGUgcHJldmlldyBzaG93cyB5b3UgYSByZW5kZXJlZCBIVE1MIGNvcHkgb2YgdGhlIGNvbnRlbnRzIG9mIHRoZSBlZGl0b3IuIENvbnNlcXVlbnRseSwgdW5saWtlICpLbml0KiwgKlByZXZpZXcqIGRvZXMgbm90IHJ1biBhbnkgUiBjb2RlIGNodW5rcy4gSW5zdGVhZCwgdGhlIG91dHB1dCBvZiB0aGUgY2h1bmsgd2hlbiBpdCB3YXMgbGFzdCBydW4gaW4gdGhlIGVkaXRvciBpcyBkaXNwbGF5ZWQuCgpgYGB7cn0KI2luc3RhbGwucGFja2FnZXMoInRpZHl2ZXJzZSIpCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeShkcGx5cikKbGlicmFyeShncmlkRXh0cmEpCmxpYnJhcnkobWFncml0dHIpCmxpYnJhcnkoUkNvbG9yQnJld2VyKQphdHRhY2goSG91c2VzYWxlKQpsaWJyYXJ5KHJlYWRyKQpsaWJyYXJ5KGNhcmV0KQpsaWJyYXJ5KGNvcnJwbG90KQpsaWJyYXJ5KGNhVG9vbHMpCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KHJhbmRvbUZvcmVzdCkKCmBgYAoKYGBge3J9CiMjIEltcG9ydGluZyBkYXRhIGZyb20gQ1NWIGFuZCBzdW1tYXJ5CmdldHdkKCkKc2V0d2QoIi9Vc2Vycy9zYWlsZXNocmF0dXJpL0RvY3VtZW50cy9HaXRIdWIvUmVncmVzc2lvbi1Ib3VzZS1TYWxlIikKSG91c2VzYWxlID0gcmVhZC5jc3YoImtjX2hvdXNlX2RhdGEuY3N2IikKSG91c2VzYWxlCmhlYWQoSG91c2VzYWxlKQpzdHIoSG91c2VzYWxlKQpzdW1tYXJ5KEhvdXNlc2FsZSkKYGBgCgpgYGB7cn0KCmxvZ19zcWZ0bGl2aW5nID0gbG9nMTAoc3FmdF9saXZpbmcpCkhvdXNlc2FsZSA9IEhvdXNlc2FsZSAlPiUgbXV0YXRlKGxvZ19wcmljZSA9IGxvZzEwKHByaWNlKSkKeCA9IG1pbihzcWZ0X2xpdmluZykKeSA9IG1heChzcWZ0X2xpdmluZykKeiA9ICh5LXgpCgojdGFraW5nIGFuaXRsb2cKbDEgPSBleHAoMi45KQpsMQpsMiA9IGV4cCgzLjYpCmwyCiNQbG90IGZvciBQcmljZQpnMSA9IGdncGxvdChIb3VzZXNhbGUsIGFlcyh4ID0gbG9nX3ByaWNlKSkgKyBnZW9tX2hpc3RvZ3JhbShmaWxsID0gInJlZCIsIGJpbndpZHRoID0gLjEwKQojUGxvdCBmb3Igc3FmdGxpdmluZwpnMiA9IGdncGxvdChIb3VzZXNhbGUsIGFlcyh4ID0gbG9nX3NxZnRsaXZpbmcpKSArIGdlb21faGlzdG9ncmFtKGZpbGwgPSAiYmx1ZSIsIGJpbndpZHRoID0gLjIwKQogCiMgZ3JpZC5hcnJhbmdlKGcxLGcyLG5yb3cgPSAxLG5jb2wgPSAyKQoKICNIb3VzZSBQcmljZSA6IFBsb3QgcmVmbGVjdCB0aGUgbW9zdCBvZiB0aGUgcHJpY2VzIG9mIGhvdXNlIGlzIGxpZSBiZXR3ZWVuIDUuNCB0byA2IG1pbGxpb24uCiAjU3FmdGxpdmluZyA6ICBNb3N0IG9mIGhvdXNlcyBoYXZlICBzcWZ0IGxpdmluZyBiZXR3ZWVuIDE4MDAgdG8gMzYwMCBzcWZ0LgogCgoKYGBgCmBgYHtyfQogI3Bsb3QgZm9yIGJhdGhyb29tCiAKIGdncGxvdChIb3VzZXNhbGUsIGFlcyh4ID0gYmF0aHJvb21zKSkgKyBnZW9tX2hpc3RvZ3JhbShmaWxsID0gInRvbWF0byIsIGJpbndpZHRoID0gMC41KSArIHNjYWxlX3hfY29udGludW91cyhsaW1pdHMgPSBjKDEsOCkpCnggPSBtYXgoYmF0aHJvb21zKQp5ID0gbWluKGJhdGhyb29tcykKeiA9IG1hZChiYXRocm9vbXMpCmBgYApgYGB7cn0KI0hvdXNlIFByaWNlIHZzIHNpemUKCiNqaXR0ZXIgdXNlZCB0byByZWR1Y2Ugb3ZlcmxhcHBpbmcKZzMgPSBnZ3Bsb3QoSG91c2VzYWxlLCBhZXMoeCA9IGxvZ19zcWZ0bGl2aW5nLCB5ID0gbG9nX3ByaWNlKSkgKyAgZ2VvbV9qaXR0ZXIoYWxwaGEgPSAwLjUsIHNpemUgPSAyLCBjb2xvciA9ICJicm93biIpICsgc3RhdF9zbW9vdGgobWV0aG9kID0gImxtIiwgc2UgPSBGLCBzcGFuID0gMC43KSArIGxhYnMoInRpdGxlID0gc3FmdGxpdmluZyB2cyBQcmljZSIpCgojSG91c2VQcmljZSB2cyBCZWRyb29tCgpteWNvbG9ycyA9IGMoYnJld2VyLnBhbChuYW1lID0gIkRhcmsyIiwgbj04KSwgYnJld2VyLnBhbChuYW1lPSJQYWlyZWQiLCBuPTYpKQojSG91c2VzYWxlID0gSG91c2VzYWxlICU+JSBmaWx0ZXIoYmVkcm9vbXMgPCAzMCkKI0hvdXNlc2FsZQpnNCA9IGdncGxvdChIb3VzZXNhbGUsIGFlcyh4ID0gYmVkcm9vbXMsIHkgPSBsb2dfcHJpY2UsIGNvbCA9IGJlZHJvb21zKSkgKyBnZW9tX3BvaW50KGFscGhhID0gMC41LCBzaXplID0gMikgKyBnZW9tX3Ntb290aChtZXRob2QgPSAibG0iLHNlID0gRikgKyBzY2FsZV9jb2xvcl9ncmFkaWVudG4oY29sb3JzID0gbXljb2xvcnMpCgpncmlkLmFycmFuZ2UoZzMsZzQsbnJvdyA9IDEsIG5jb2wgPSAyKQoKI2dncGxvdChhZXMoSG91c2VzYWxlLHg9YmVkcm9vbXMseT1sb2dfcHJpY2UpKSsKI2dlb21fcG9pbnQoYWxwaGE9MC41LHNpemU9MikrCiNnZW9tX3Ntb290aChtZXRob2Q9ImxtIixzZT1GKSsKI2xhYnMoInRpdGxlPUJlZHJvb21zIHZzIFByaWNlIikrc2NhbGVfY29sb3JfZ3JhZGllbnRuKGNvbG9ycz1teWNvbG9ycykrdGhlbWUobGVnZW5kLnBvc2l0aW9uPSJub25lIikKCiMgcG9zaXRpdmUgcmVsYXRpb25zaGlwIG9mIGJlZHJvb20gYW5kIHNpemUgdnMgUHJpY2UKYGBgCmBgYHtyfQoKIGc1ID0gZ2dwbG90KEhvdXNlc2FsZSwgYWVzKHggPSBzcWZ0X2Jhc2VtZW50LCB5ID0gbG9nX3ByaWNlKSkgKyBnZW9tX3BvaW50KCBjb2wgPSAiZ3JlZW4iLCBhbHBoYSA9IDAuNSkgKyBzdGF0X3Ntb290aChtZXRob2QgPSAibG0iLCBzZSA9IEYsIGFscGhhID0gMC42LCBzaXplID0gMC41KQoKZzYgPSBnZ3Bsb3QoSG91c2VzYWxlLCBhZXMoeCA9IHlyX2J1aWx0LCB5ID0gbG9nX3ByaWNlKSkgKyBnZW9tX2ppdHRlcihjb2wgPSAiYmx1ZSIsIGFscGhhID0gMC41KSArIGdlb21fc21vb3RoKG1ldGhvZCA9ICJhdXRvIiwgc2UgPSBUKQoKZ3JpZC5hcnJhbmdlKGc1LGc2LG5yb3cgPSAxLCBuY29sID0gMikKCiMgUG9zaXRpdmUgcmVsYXRpb25zaGlwIG9mIGJhc2VtZW50IHNpemUgYW5kIFByaWNlIHdoZXJlYXMgcHJpY2UgaW5jcmVhc2UgZm9yIGhvdXNlcyBidWlsdCBhZnRlciAxOTc1CgpgYGAKYGBge3J9Cgp0YWJsZShjb25kaXRpb24pCkhvdXNlc2FsZSAlPiUgZ3JvdXBfYnkoZmFjdG9yKGNvbmRpdGlvbikpICU+JSBzdW1tYXJpc2UobWVhbl9wcmljZSA9IG1lYW4obG9nX3ByaWNlKSwgc2QgPSBzZChsb2dfcHJpY2UpLCBjb3VudCA9IG4oKSkKYGBgCmBgYHtyfQojIERpc3RyaWJ1dGlvbiBvZiBIb3VzZXByaWNlIGFjY29yZGluZyB0byBjb25kaXRpb24gb2YgaG91c2UKCmdncGxvdChIb3VzZXNhbGUsIGFlcyh4ID0gZmFjdG9yKGNvbmRpdGlvbiksIHkgPSBsb2dfcHJpY2UsIGZpbGwgPSBmYWN0b3IoY29uZGl0aW9uKSkpICsgZ2VvbV9ib3hwbG90KCkKCiMgUmVsYXRpb25zaGlwIGJldHdlZW4sIHNpemUsIHByaWNlIGFuZCBjb25kaXRpb24KZ2dwbG90KEhvdXNlc2FsZSwgYWVzKHggPSBsb2dfc3FmdGxpdmluZywgeSA9IGxvZ19wcmljZSwgY29sb3IgPSBmYWN0b3IoY29uZGl0aW9uKSkpICsgZ2VvbV9wb2ludChhbHBoYSA9IDAuNSkgKyAgZ2VvbV9zbW9vdGgobWV0aG9kID0gImxtIiwgc2UgPSBGLCBjb2xvciA9ICJCbGFjayIpICsgZmFjZXRfd3JhcCh+Y29uZGl0aW9uKSAKCiNncmlkLmFycmFuZ2UoZzcsIGc4LCBucm93ID0gMSwgbmNvbCA9IDIpCgpgYGAKYGBge3J9CnRhYmxlKGZsb29ycykKSG91c2VzYWxlICU+JSBncm91cF9ieShmbHIgPSBmYWN0b3IoZmxvb3JzKSkgJT4lIHN1bW1hcmlzZShmbG9vcl9jbnQgPSBuKCkpICU+JQpnZ3Bsb3QoYWVzKHggPSBmbHIsZmxvb3JfY250LCBmaWxsID0gZmxyKSkgKyBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IikKCiNoaXN0KGZsb29ycykKCmBgYAoKYGBge3J9CmdncGxvdChIb3VzZXNhbGUsIGFlcyh4ID0gZmFjdG9yKGZsb29ycyksIHkgPSAobG9nX3ByaWNlKSxmaWxsID0gZmFjdG9yKGZsb29ycykpKSArIGdlb21fYm94cGxvdCgpCgojIG1lYW4gcHJpY2UgdGVuZHMgdG8gaW5jcmVhc2UgZnJvbSAxIHRvIDIuNSBidXQgYWZ0ZXJ3YWRzIGRlY2xpbmUgaW4gbWVhbiBwcmljZQoKYGBgCmBgYHtyfQojZ2dwbG90KEhvdXNlc2FsZSwgYWVzKHggPSB5cl9idWlsdCwgeSA9IGxvZ19wcmljZSkpICsgZ2VvbV9wb2ludCgpCiMrIAojSG91c2VzIGJ1bGl0IHllYXJseQoKSG91c2VzYWxlICU+JSBnZ3Bsb3QoYWVzKHggPSB5cl9idWlsdCkpICsgZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGggPSA1LCBmaWxsID0gcmFpbmJvdygxKSwgYWxwaGEgPSAwLjUpICsgc2NhbGVfeF9jb250aW51b3VzKGxpbWl0cyA9IGMoMTkwMCwyMDE2KSkKYGBgCmBgYHtyfQojIEhvdXNlIGJ1aWx0IHllYXIgd2lzZSB2cyBzaXplIG9mIGhvdXNlKHNxZnQpCm9wdGlvbnMocmVwci5wbG90LndpZHRoID0gMTAsIHJlcHIucGxvdC5oZWlnaHQgPSA2KQpnZ3Bsb3QoSG91c2VzYWxlLCBhZXMoeCA9IGZhY3Rvcih5cl9idWlsdCksIHkgPSBsb2dfc3FmdGxpdmluZywgZmlsbCA9IGZhY3Rvcih5cl9idWlsdCkpKSArIGdlb21fYm94cGxvdCgpICsgdGhlbWUobGVnZW5kLnBvc2l0aW9uID0gIm5vbmUiKSAKCmdncGxvdChIb3VzZXNhbGUsIGFlcyh4PXlyX2J1aWx0LCB5ID0gbG9nX3NxZnRsaXZpbmcsIGNvbG9yID0gImdyZWVuIikpICsgZ2VvbV9qaXR0ZXIoYWxwaGEgPTAuNSwgc2l6ZSA9IDAuNSkgKyBzdGF0X3Ntb290aChtZXRob2QgPSAiYXV0byIsIGNvbG9yID0gImJsYWNrIikKCiMgdHJlbmQgb2YgaW5jcmVhc2UgaW4gc3FmdCBsaXZpbmcgMTk1MCBvbndhcmRzIHRpbGwgMTk5MAoKYGBgCmBgYHtyfQojSG91c2UgVmlldwoKdGFibGUoSG91c2VzYWxlJHdhdGVyZnJvbnQpCgpIb3VzZXNhbGUkaG91c2V2aWV3ID0gaWZlbHNlKHdhdGVyZnJvbnQgPT0xLFRSVUUsRkFMU0UpCiNnZ3Bsb3QoSG91c2VzYWxlLCBhZXMoeCA9IGhvdXNldmlldywgeSA9IGxvZ19wcmljZSwgZmlsbCA9IGZhY3Rvcih3YXRlcmZyb250KSkpICsgZ2VvbV9ib3hwbG90KCkKIyBNb3N0IG9mIHRoZSBob3VzZXMgZG9lc25vdCBoYXZlIHdhdGVyZnJvbnQgd2hpbGUgaG91c2VzIHdpdGggd2F0ZXJmcm9udCBhcmUgbW9yZSBleHBlbnNpdmUKCkhvdXNlc2FsZSAlPiUgZ3JvdXBfYnkoaG91c2V2aWV3KSAlPiUgc3VtbWFyaXNlKG1lYW5wcmljZSA9IG1lYW4obG9nX3ByaWNlKSxob3VzZWNvdW50ID0gbigpICkKCmdncGxvdChIb3VzZXNhbGUsYWVzKCB4ID0gbG9nX3NxZnRsaXZpbmcsIHk9IGxvZ19wcmljZSwgY29sID0gaG91c2V2aWV3KSkgKyBnZW9tX3BvaW50KGFscGhhID0gMC41KSArIGdlb21fc21vb3RoKG1ldGhvZCA9ICJsbSIgLHNpemUgPTAuNSwgY29sb3IgPSAiYmxhY2siKSArIHNjYWxlX2NvbG9yX21hbnVhbCh2YWx1ZXMgPSByYWluYm93KG49MTIpKSArZmFjZXRfd3JhcCh+aG91c2V2aWV3KQojIHNvbGQgaG91c2VzIHdoaWNoIGhhdmUgd2F0ZXJmcm9udCBhcmUgZXhwZW5zaXZlIGFuZCBoaWdoIHNxZnRsaXZpbmcgYnV0IGxlc3MgaW4gbnVtYmVyCgpgYGAKCmBgYHtyfQp0YWJsZShncmFkZSkKCiNncmFkZSB2cyBwcmljZQoKI2dncGxvdChIb3VzZXNhbGUsIGFlcyh4ID0gZmFjdG9yKGdyYWRlKSwgeSA9IGxvZ19wcmljZSwgZmlsbCA9IGZhY3RvcihncmFkZSkpKSArIGdlb21fYm94cGxvdChhbHBoYSA9IDAuNSkKCiMgZ3JhZGUgdnMgc3FmdGxpdmluZyB2cyBwcmljZQoKZ2dwbG90KEhvdXNlc2FsZSwgYWVzKHggPSBsb2dfc3FmdGxpdmluZywgeSA9IGxvZ19wcmljZSwgY29sb3IgPSBmYWN0b3IoZ3JhZGUpKSkgKyBnZW9tX3BvaW50KGFscGhhID0gMC41KSArIGZhY2V0X3dyYXAofmdyYWRlKSArIGdlb21fc21vb3RoKG1ldGhvZCA9ICJsbSIsIGNvbG9yID0gImJsYWNrIikgKyBzY2FsZV9jb2xvcl9tYW51YWwodmFsdWVzID0gcmFpbmJvdyhuPTEyKSkgKyB0aGVtZShsZWdlbmQucG9zaXRpb24gPSAibm9uZSIpCgpgYGAKYGBge3J9CiMgUmVub3ZhdGUgeWVhcgoKdGFibGUoSG91c2VzYWxlJHlyX3Jlbm92YXRlZCkKCkhvdXNlc2FsZSRpc3Jlbm8gPSBpZmVsc2UoeXJfcmVub3ZhdGVkID09IDAsIEZBTFNFLCBUUlVFKQp0YWJsZShIb3VzZXNhbGUkaXNyZW5vKQoKIyBoaXN0b2dyYW0gb2YgeXIgcmVub3ZhdGVkCmdncGxvdChIb3VzZXNhbGUsIGFlcyh5cl9yZW5vdmF0ZWQpKSArIGdlb21faGlzdG9ncmFtKGFscGhhID0gMC41LGJpbndpZHRoID0gMSwgZmlsbCA9IHJhaW5ib3coMSkpICsgc2NhbGVfeF9jb250aW51b3VzKGxpbWl0cyA9IGMoMTkwMCwyMDE2KSkKCiNnZ3Bsb3QoSG91c2VzYWxlLCBhZXMoeCA9IGZhY3Rvcih5cl9yZW5vdmF0ZWQpLCB5PSBsb2dfcHJpY2UsIGZpbGwgPSBmYWN0b3IoeXJfcmVub3ZhdGVkKSkpICsgZ2VvbV9ib3hwbG90KCkKCiMgeWVhciBidWlsdCB2cyBwcmljZSB2cyByZW5vdmF0ZQpnZ3Bsb3QoSG91c2VzYWxlLCBhZXMoeCA9IHlyX2J1aWx0LCB5ID0gbG9nX3ByaWNlLCBjb2wgPSB5cl9yZW5vdmF0ZWQpKSArIGdlb21faml0dGVyKGFscGhhID0gMC41KQoKI1Jlbm92YXRlIHllYXIgdnMgeWVhciBidWlsdApnZ3Bsb3QoSG91c2VzYWxlLCBhZXMoeCA9IHlyX2J1aWx0LCB5ID0geXJfcmVub3ZhdGVkLCBjb2xvciA9IGlzcmVubykpICsgZ2VvbV9qaXR0ZXIoYWxwaGEgPSAwLjUpICsgZmFjZXRfd3JhcCh+aXNyZW5vKQoKCmBgYAoKYGBge3J9CiNzcGxpdHRpbmcgdGhlIGRhdGEgaW50byB0cmFpbiBhbmQgdGVzdCBzdWJzZXQKc2V0LnNlZWQoMDUxMikKc2FtcGxlID0gc2FtcGxlLnNwbGl0KEhvdXNlc2FsZSwgU3BsaXRSYXRpbyA9IC43MCkKdHJhaW5ocyA9IHN1YnNldChIb3VzZXNhbGUsIHNhbXBsZSA9PSBUUlVFKQp0ZXN0aHMgPSBzdWJzZXQoSG91c2VzYWxlLCBzYW1wbGUgPT0gRkFMU0UpCm5yb3coSG91c2VzYWxlKQpuY29sKEhvdXNlc2FsZSkKbnJvdyh0cmFpbmhzKQpucm93KHRlc3RocykKCgpgYGAKCgpgYGB7cn0KIyB2YXJpYWJsZSBzaWduaWZpY2FuY2UgYW5kIGNoZWNraW5nIGNvcnJlbGF0aW9uCgpjb3JyID0gIGNvcihIb3VzZXNhbGVbLDM6MjFdKQpjb3JycGxvdChjb3JyKQoKCgpgYGAKCmBgYHtyfQojIG1vZGVsIGNyZWF0aW9uIHVzaW5nIGxpbmVhciByZWdyZXNzaW9uKHBhcmFtZXRlcnMgb24gYmFzaXMgb2YgY29ycmVsZWF0aW9uIG1hdHJpeCkKCm1vZGVsbG0xID0gbG0obG9nKHByaWNlKSB+IGJlZHJvb21zICsgYmF0aHJvb21zICsgc3FmdF9saXZpbmcgKyB3YXRlcmZyb250ICsgdmlldyArIGNvbmRpdGlvbiArIGdyYWRlICsgeXJfYnVpbHQgKyB5cl9yZW5vdmF0ZWQgKyB6aXBjb2RlICArIGxhdCArIHNxZnRfbGl2aW5nMTUgKyBzcWZ0X2xvdDE1ICAgLCBkYXRhID0gdHJhaW5ocykKCnN1bW1hcnkobW9kZWxsbTEpCiNwbG90KG1vZGVsbG0xKQpgYGAKCmBgYHtyfQojIG1vZGVsIGNyZWF0aW9uIHVzaW5nIGxpbmVhciByZWdyZXNzaW9uIG9yIHRyYWluaW5nIG1vZGVsKCBhbGwgcGFyYW1ldGVycykKbW9kZWxsbTIgPSBsbShsb2cocHJpY2UpIH4gLiwgZGF0YSA9IHRyYWluaHNbLDM6MjJdKQpzdW1tYXJ5KG1vZGVsbG0yKQojcGxvdChtb2RlbGxtMikKCmBgYApgYGB7cn0KIyBtb2RlbCBjcmVhdGlvbiB1c2luZyBsaW5lYXIgcmVncmVzc2lvbiB1c2luZyBjcm9zcy12YWxpZGF0aW9uIHRlY2huaXF1ZShwYXJ0aXRpb25pbmcgZGF0YXNldCBpbnRvIHJhbmRvbSBwYXJ0aXRpb24gLSB0cmFpbiBhbmQgdGVzdChudW1iZXIgb2YgZm9sZHMpLCBhdmVyYWdlIG9mIGFjY3VyYWN5IG1ldHJpY3MgZm9yIGFsbCBmb2xkcyB0YWtlbiB0byBjb21lIGFjcm9zcyBob3cgdHJhaW5pbmcgbW9kZWwgd2lsbCBwZXJmb3JtIG9uIHVua25vd24gdGVzdCBkYXRhc2V0KQojbW9kZWxsbTMgPSB0cmFpbihsb2cocHJpY2UpIH4gLixkYXRhID0gdHJhaW5oc1ssMzoyMl0sIG1ldGhvZCA9ICJsbSIsIHRyQ29udHJvbCA9IHRyYWluQ29udHJvbChtZXRob2QgPSAiY3YiLG51bWJlciA9IDEwLCBzYXZlUHJlZGljdGlvbnMgPSBUUlVFKSkKaW5zdGFsbC5wYWNrYWdlcygiZ2dmb3J0aWZ5IikKbW9kZWxsbTMgPSB0cmFpbihwcmljZSB+IC4sZGF0YSA9IEhvdXNlc2FsZSwgbWV0aG9kID0gImxtIiwgdHJDb250cm9sID0gdHJhaW5Db250cm9sKG1ldGhvZCA9ICJjdiIsbnVtYmVyID0gNSwgdmVyYm9zZUl0ZXIgPSBUUlVFKSkKc3VtbWFyeShtb2RlbGxtMykKbGlicmFyeShnZ2ZvcnRpZnkpCmF1dG9wbG90KG1vZGVsbG0zLG5jb2wgPSAyKQoKCgpgYGAKYGBge3J9CiMgbW9kZWwgY3JlYXRpb24gdXNpbmcgbGluZWFyIHJlZ3Jlc3Npb24gdXNpbmcgY3Jvc3MtdmFsaWRhdGlvbiB0ZWNobmlxdWUocGFydGl0aW9uaW5nIGRhdGFzZXQgaW50byByYW5kb20gcGFydGl0aW9uIC0gdHJhaW4gYW5kIHRlc3QobnVtYmVyIG9mIGZvbGRzKSwgYXZlcmFnZSBvZiBhY2N1cmFjeSBtZXRyaWNzIGZvciBhbGwgZm9sZHMgdGFrZW4gdG8gY29tZSBhY3Jvc3MgaG93IHRyYWluaW5nIG1vZGVsIHdpbGwgcGVyZm9ybSBvbiB1bmtub3duIHRlc3QgZGF0YXNldCkKbW9kZWxsbTYgPSB0cmFpbihsb2cocHJpY2UpIH4gIGJlZHJvb21zICsgYmF0aHJvb21zICsgc3FmdF9saXZpbmcgKyB3YXRlcmZyb250ICsgdmlldyArIGNvbmRpdGlvbiArIGdyYWRlICsgeXJfYnVpbHQgKyB5cl9yZW5vdmF0ZWQgICsgbGF0ICsgc3FmdF9saXZpbmcxNSArIHNxZnRfbG90MTUgKyBJKGJlZHJvb21zXjIpICArIEkoc3FmdF9saXZpbmdeMikgKyBJKHZpZXdeMikgKyBJKGdyYWRlXjIpICsgSSh5cl9idWlsdF4yKSArIEkoeXJfcmVub3ZhdGVkXjIpICArIEkobGF0XjIpICsgSShzcWZ0X2xpdmluZzE1XjIpKyBJKHNxZnRfbG90MTVeMiksZGF0YSA9IHRyYWluaHNbLDM6MjJdLCBtZXRob2QgPSAibG0iLCB0ckNvbnRyb2wgPSB0cmFpbkNvbnRyb2wobWV0aG9kID0gImN2IixudW1iZXIgPSAxMCwgc2F2ZVByZWRpY3Rpb25zID0gVFJVRSkpCnN1bW1hcnkobW9kZWxsbTYpCiNwYXJhbWV0ZXJncmlkIDwtIGV4cGFuZC5ncmlkKGMoNSw2LDcsOCkpCiNzdW1tYXJ5KG1vZGVsbG0zKQoKCmBgYApgYGB7cn0KI2NoZWNraW5nIHJlc2lkdWFscyBwbG90IGZvciBlYWNoIHBhcmFtZXRlcgpnMSA9IGdncGxvdCh0cmFpbmhzLCBhZXMoYmF0aHJvb21zLCByZXNpZHVhbHMobW9kZWxsbTEpKSkgKyBnZW9tX3BvaW50KCkgKyBnZW9tX3Ntb290aCgpCmcyID0gZ2dwbG90KHRyYWluaHMsIGFlcyhzcWZ0X2xpdmluZywgcmVzaWR1YWxzKG1vZGVsbG0xKSkpICsgZ2VvbV9wb2ludCgpICsgZ2VvbV9zbW9vdGgoKQpnMyA9IGdncGxvdCh0cmFpbmhzLCBhZXModmlldywgcmVzaWR1YWxzKG1vZGVsbG0xKSkpICsgZ2VvbV9wb2ludCgpICsgZ2VvbV9zbW9vdGgoKQpnNCA9IGdncGxvdCh0cmFpbmhzLCBhZXMoZ3JhZGUsIHJlc2lkdWFscyhtb2RlbGxtMSkpKSArIGdlb21fcG9pbnQoKSArIGdlb21fc21vb3RoKCkKZzUgPSBnZ3Bsb3QodHJhaW5ocywgYWVzKHlyX2J1aWx0LCByZXNpZHVhbHMobW9kZWxsbTEpKSkgKyBnZW9tX3BvaW50KCkgKyBnZW9tX3Ntb290aCgpCmc2ID1nZ3Bsb3QodHJhaW5ocywgYWVzKHNxZnRfbGl2aW5nMTUsIHJlc2lkdWFscyhtb2RlbGxtMSkpKSArIGdlb21fcG9pbnQoKSArIGdlb21fc21vb3RoKCkKZzcgPSBnZ3Bsb3QodHJhaW5ocywgYWVzKGJlZHJvb21zLCByZXNpZHVhbHMobW9kZWxsbTEpKSkgKyBnZW9tX3BvaW50KCkgKyBnZW9tX3Ntb290aCgpCmc4ID0gZ2dwbG90KHRyYWluaHMsIGFlcyh3YXRlcmZyb250LCByZXNpZHVhbHMobW9kZWxsbTEpKSkgKyBnZW9tX3BvaW50KCkgKyBnZW9tX3Ntb290aCgpCmc5ID0gZ2dwbG90KHRyYWluaHMsIGFlcyhsYXQsIHJlc2lkdWFscyhtb2RlbGxtMSkpKSArIGdlb21fcG9pbnQoKSArIGdlb21fc21vb3RoKCkKZzEwID1nZ3Bsb3QodHJhaW5ocywgYWVzKHNxZnRfbG90MTUsIHJlc2lkdWFscyhtb2RlbGxtMSkpKSArIGdlb21fcG9pbnQoKSArIGdlb21fc21vb3RoKCkKZzExID1nZ3Bsb3QodHJhaW5ocywgYWVzKGNvbmRpdGlvbiwgcmVzaWR1YWxzKG1vZGVsbG0xKSkpICsgZ2VvbV9wb2ludCgpICsgZ2VvbV9zbW9vdGgoKQpnMTIgPWdncGxvdCh0cmFpbmhzLCBhZXMoeXJfcmVub3ZhdGVkLCByZXNpZHVhbHMobW9kZWxsbTEpKSkgKyBnZW9tX3BvaW50KCkgKyBnZW9tX3Ntb290aCgpCmcxMyA9Z2dwbG90KHRyYWluaHMsIGFlcyh6aXBjb2RlLCByZXNpZHVhbHMobW9kZWxsbTEpKSkgKyBnZW9tX3BvaW50KCkgKyBnZW9tX3Ntb290aCgpCgpncmlkLmFycmFuZ2UoZzEsZzIsZzMsZzQsIG5yb3cgPSAxLCBuY29sID0gNCkKZ3JpZC5hcnJhbmdlKGc1LGc2LGc3LGc4LCBucm93ID0gMSwgbmNvbCA9IDQpCmdyaWQuYXJyYW5nZShnOSxnMTAsZzExLGcxMixnMTMsIG5yb3cgPSAxLCBuY29sID0gNSkKCmBgYApgYGB7cn0KCiNtb2RlbCBjcmVhdGlvbiB1c2luZyBub25saW5lYXJsaXR5IG9mIHBhcmFtZXRlcnMgYmFzZWQgb24gcmVzaWR1YWxzIHBsb3QKI21vZGVsbG00ID0gbG0obG9nKHByaWNlKSB+ICBiZWRyb29tcyArIGJhdGhyb29tcyArIHNxZnRfbGl2aW5nICsgd2F0ZXJmcm9udCArIHZpZXcgKyBjb25kaXRpb24gKyBncmFkZSArIHlyX2J1aWx0ICsgeXJfcmVub3ZhdGVkICsgemlwY29kZSAgKyBsYXQgKyBzcWZ0X2xpdmluZzE1ICsgc3FmdF9sb3QxNSArIEkoYmVkcm9vbXNeMikgKyBJKGJhdGhyb29tc14yKSArIEkoc3FmdF9saXZpbmdeMikrIEkod2F0ZXJmcm9udF4yKSAgKyBJKHZpZXdeMikgKyBJKGNvbmRpdGlvbl4yKSArIEkoZ3JhZGVeMikgKyBJKHlyX2J1aWx0XjIpICsgSSh5cl9yZW5vdmF0ZWReMikgICtJKHppcGNvZGVeMikgICsgSShsYXReMikgKyBJKHNxZnRfbGl2aW5nMTVeMikrIEkoc3FmdF9sb3QxNV4yKSAsIGRhdGEgPSB0cmFpbmhzKQojc3VtbWFyeShtb2RlbGxtNCkKI3Bsb3QobW9kZWxsbTQpCm1vZGVsbG00ID0gbG0obG9nKHByaWNlKSB+ICBiZWRyb29tcyArIGJhdGhyb29tcyArIHNxZnRfbGl2aW5nICsgd2F0ZXJmcm9udCArIHZpZXcgKyBjb25kaXRpb24gKyBncmFkZSArIHlyX2J1aWx0ICsgeXJfcmVub3ZhdGVkICsgemlwY29kZSAgKyBsYXQgKyBzcWZ0X2xpdmluZzE1ICsgc3FmdF9sb3QxNSArIEkoYmVkcm9vbXNeMikgKyBJKGJhdGhyb29tc14yKSArIEkoc3FmdF9saXZpbmdeMikgICsgSShncmFkZV4yKSArIEkoeXJfYnVpbHReMikgKyBJKHlyX3Jlbm92YXRlZF4yKSAgK0koemlwY29kZV4yKSAgKyBJKGxhdF4yKSArIEkoc3FmdF9saXZpbmcxNV4yKSsgSShzcWZ0X2xvdDE1XjIpICwgZGF0YSA9IHRyYWluaHMpCnN1bW1hcnkobW9kZWxsbTQpCmBgYApgYGB7cn0KI21vZGVsIGNyZWF0aW9uIGFmdGVyIHJlbW92aW5nIGxlc3Mgc2lnbmlmaWNhbnQgcGFyYW1ldGVycyBpZGVudGlmaWVkIGluIG1vZGVsbG00CiNtb2RlbGxtNSA9IHVwZGF0ZShtb2RlbGxtNCwgfi4temlwY29kZS1JKGJhdGhyb29tc14yKS1JKHdhdGVyZnJvbnReMiktSShjb25kaXRpb25eMiktSSh6aXBjb2RlXjIpKQptb2RlbGxtNSA9IGxtKGxvZyhwcmljZSkgfiAgYmVkcm9vbXMgKyBiYXRocm9vbXMgKyBzcWZ0X2xpdmluZyArIHdhdGVyZnJvbnQgKyB2aWV3ICsgY29uZGl0aW9uICsgZ3JhZGUgKyB5cl9idWlsdCArIHlyX3Jlbm92YXRlZCAgKyBsYXQgKyBzcWZ0X2xpdmluZzE1ICsgc3FmdF9sb3QxNSArIEkoYmVkcm9vbXNeMikgICsgSShzcWZ0X2xpdmluZ14yKSArIEkodmlld14yKSArIEkoZ3JhZGVeMikgKyBJKHlyX2J1aWx0XjIpICsgSSh5cl9yZW5vdmF0ZWReMikgICsgSShsYXReMikgKyBJKHNxZnRfbGl2aW5nMTVeMikrIEkoc3FmdF9sb3QxNV4yKSAsIGRhdGEgPSB0cmFpbmhzKQpzdW1tYXJ5KG1vZGVsbG01KQpwbG90KG1vZGVsbG01KQoKYGBgCmBgYHtyfQojIHJtc2UgZm9yIHJlZ3Jlc3Npb24gbW9kZWwKCnRlc3Rtb2RlbCA9IHByZWRpY3QobW9kZWxsbTcsIHRlc3Roc1ssMzoyMV0pCnBsb3QoZXhwKHRlc3Rtb2RlbCkgfiB0ZXN0aHMkcHJpY2UpCiNwbG90KHRlc3Rtb2RlbCB+IHRlc3RocyRwcmljZSkKYWJsaW5lKGEgPSAwLCBiID0gMSkKCnJlczEgPSAgZXhwKHRlc3Rtb2RlbCkgLSB0ZXN0aHMkcHJpY2UKI3JlczEgPSAgdGVzdG1vZGVsIC0gdGVzdGhzJHByaWNlCiNybXNlIDwtIHNxcnQoc3VtKChleHAodGVzdG1vZGVsKSAtIHRlc3RocyRwcmljZSleMikvbGVuZ3RoKHRlc3RocyRwcmljZSkpCnJtc2UgPSBzcXJ0KG1lYW4ocmVzMV4yKSkKcm1zZQoKYGBgCmBgYHtyfQoKI3RyeWluZyByYW5kb20gZm9yZXN0IG1vZGVsCm1vZGVscmYgID0gcmFuZG9tRm9yZXN0KHByaWNlIH4gLiwgdHJhaW5oc1ssMzoyMV0sbXRyeSA9IDYsIGltcG9ydGFuY2UgPSBUUlVFKQpzdW1tYXJ5KG1vZGVscmYpCmltcG9ydGFuY2UobW9kZWxyZikKdmFySW1wUGxvdChtb2RlbHJmLHR5cGUgPSAyKQpwYXJhID0gc3FmdF9saXZpbmcrZ3JhZGUrbGF0K3NxZnRfbGl2aW5nMTUrc3FmdF9hYm92ZStsb25nK2JhdGhyb29tcyt5cl9idWlsdCArIHZpZXcKYGBgCmBgYHtyfQoKCm1vZGVsbG03ID0gbG0obG9nKHByaWNlKSB+IHNxZnRfbGl2aW5nICsgZ3JhZGUgKyBsYXQgKyBzcWZ0X2xpdmluZzE1ICsgc3FmdF9hYm92ZSArIGxvbmcgKyBiYXRocm9vbXMgKyB5cl9idWlsdCArIHZpZXcsIGRhdGEgPSB0cmFpbmhzWywzOjIxXSkKc3VtbWFyeShtb2RlbGxtNykKYGBgCgpgYGB7cn0KI3Jtc2UgZm9yIHJhbmRvbSBmb3Jlc3QgbW9kZWwKdGVzdG1vZGVsX3JmID0gcHJlZGljdChtb2RlbHJmLCB0ZXN0aHMpCiNwMSA9IHBsb3QodGVzdGhzJHByaWNlIH4gZXhwKHRlc3Rtb2RlbCkpCiNhYmxpbmUoYSA9MCwgYiA9MSkKI3AyID0gcGxvdCh0ZXN0aHMkcHJpY2UgfiB0ZXN0bW9kZWxfcmYpCiNhYmxpbmUoYSA9MCwgYiA9MSkKCiNncmlkLmFycmFuZ2UocDEscDIsIG5yb3cgPSAxLCBuY29sID0gMikKcmVzMSA9ICB0ZXN0bW9kZWxfcmYgLSB0ZXN0aHMkcHJpY2UKcm1zZSA9IHNxcnQobWVhbihyZXMxXjIpKQpybXNlCmBgYAoK